# Dickstein, Ho, and Mark (2023)
# In this script, we recreate the switcher sample using the larger sample

# ~ # ~ # ~ # ~ #
# Preliminaries #
# ~ # ~ # ~ # ~ #

setwd("../library")
source("PreliminariesCode.R")

# Set percent of group tracked definition 
p_forced_boundary <- .8

# ~ # ~ # ~ # ~ #
# Loading Data  #
# ~ # ~ # ~ # ~ #

# Import full member month dataset
# These datasets include all patients with medical or non-RX plans, including those in large group employer and other plans.
OrigDat2015 <- fread(paste0("orig/MM", 2015, "_u.txt"), 
  select = c("patID", "subscriberID", "market", "rxflag", "medflag", "Months", "relation"))
OrigDat2015$year <- 2015
OrigDat2016 <- fread(paste0("orig/MM", 2016, "_u.txt"),
  select = c("patID", "subscriberID", "market", "rxflag", "medflag", "Months", "relation"))
OrigDat2016$year <- 2016
OrigDatTotal <- rbind(OrigDat2015, OrigDat2016)
OrigDatTotal$nmonths <- (nchar(OrigDatTotal$Month)+1) / 3
OrigDatTotal$Months <- NULL

# Loading Subscriber Data:
SubDat_List<- list()
for (i in 5:7){
  SubDat_List[[i]] <- fread(paste0("orig/SubData", Shortyear.Vec[i], "7.csv"))
}
SubDat <- do.call("rbind", SubDat_List)

# Loading personkey crosswalk
PersonkeyCrosswalk <- fread("orig/Pat2PKeyCrosswalkNew.txt")

# ~ # ~ # ~ # ~ #
# Cleaning Data #
# ~ # ~ # ~ # ~ #

# Subscriber Data
## Adding personkey
SubDat <- merge(SubDat, 
  PersonkeyCrosswalk, 
  by.x = "subscriberid", 
  by.y = "patID",
  all.x = T)
print(paste0("There are ", sum(is.na(SubDat$personkeynew)), " subscriber observations (out of ", nrow(SubDat), " total) without a personkey. These are removed in creating the switcher sample."))
SubDat <- SubDat[!is.na(personkeynew)]

# Market Next Year Data
## Adding new personkey
OrigDatTotal <- merge(OrigDatTotal, 
  PersonkeyCrosswalk, 
  by = "patID", 
  all.x = T)
print(paste0("There are ", with(OrigDatTotal, mean(is.na(personkeynew))), " observations without a personkey."))

## Collapse the Original data into a personkey-year level: 
OrigDatTotal[, pkyear := paste0(personkeynew, year, "999"), ]
OrigDatYearLevel <- OrigDatTotal[OrigDatTotal[, .I[which.max(nmonths)], by = list(pkyear)]$V1]
OrigDatYearLevel$year.lag <- OrigDatYearLevel$year - 1
OrigDatYearLevel <- OrigDatYearLevel[, .(
  personkeynew, market.nextyr = market, relation.nextyr  = relation, nmonths, year.lag), ]

# ~ # ~ # ~ # ~ # ~ # ~ #
# Merging Data Together #
# ~ # ~ # ~ # ~ # ~ # ~ #

# First, merging subscriber data with subscriber data next year
# (to ensure that these markings take precedence). 
print(paste0("There were", nrow(SubDat), " observations before the merge."))
SubMarketType <- SubDat[, .(
  indmarket.lead = (sum(markettype == "Individual") > 0), 
  sgmarket.lead = (sum(markettype == "SmallGroup") > 0),
  nind = sum(markettype == "Individual"),
  indsubscriberid.lead = subscriberid[which.max(nummonthsmode * as.numeric(markettype == "Individual"))]),
  by = c("personkeynew", "year")] # Note: .lead used for merged variable labeling.              
SubMarketType[, year.lag := year - 1, ]

SubDat <- merge(SubDat, 
 SubMarketType, 
  by.x = c("personkeynew", "year"),
  by.y = c("personkeynew", "year.lag"),
  all.x = T)

SubDat[, indmarket.lead:= ifelse(
  is.na(indmarket.lead), 
  0, 
  as.numeric(indmarket.lead)), ]
SubDat[, sgmarket.lead:= ifelse(
  is.na(sgmarket.lead), 
  0, 
  as.numeric(sgmarket.lead)), ]

print(paste0("There were ", nrow(SubDat), " observations after the merge."))

# Second, merging subscriber data with "Original" data to see if subscribers ended up in other types of insurance:
print(paste0("There were ", nrow(SubDat), " observations before the merge."))
SubDat <- merge(SubDat, 
  OrigDatYearLevel, 
  by.x = c("personkeynew", "year"),
  by.y = c("personkeynew", "year.lag"),
  all.x = T)
print(paste0("There were ", nrow(SubDat), " observations after the merge."))

# Last, merge subscriber data with a dataset of small group contract numbers. 
# This determines whether the contract number disappears or not. 
print(paste0("There were", nrow(SubDat), " observations before the merge."))

## Creating contract number dataset
ConDat <- SubDat[, .(con.exists.lead = 1), by = c("year", "contractnum")]
ConDat[, year.lag := year - 1, ]
ConDat[, year:= NULL, ]

## Merging
SubDat <- merge(
  SubDat, 
  ConDat,
  by.x = c("contractnum", "year"),
  by.y = c("contractnum", "year.lag"),
  all.x = T)
print(paste0("There were", nrow(SubDat), " observations after the merge."))

# ~ # ~ # ~ # ~ # # ~ # ~ #
# Finding Switchers       #
# ~ # ~ # ~ # ~ # # ~ # ~ #

# Subset the data to only years in which any switchers could exist: 2014 and 2015. 
SubDat <- SubDat[year %in% 2014:2015]

# Define an in_sample subscriber as anyone who moved from the small group into the individual market
SubDat[, in_sample.lead := fcase(
  markettype == "SmallGroup" & indmarket.lead == T, 1, 
  default = 0), ]

# Find the destination of folks next year
SubDat[, destination := fcase(
  in_sample.lead == 1, "Individual Market (Data)",
  age >=64, "Medicare", 
  market.nextyr %in% 1:2, "Individual Market (Outside)",
  market.nextyr %in% 3:4, "Small Group (Outside)", 
  market.nextyr %in% 6:7, "Large Group", 
  market.nextyr %in% 8:9, "Self-Funded", 
  market.nextyr %in% 10:11, "Other",
  default = "Untracked"), ]
SubDat[, destinationtype := fcase(
  in_sample.lead == 1, "Individual Market (Data)",
  age >=64, "Medicare", 
  market.nextyr %in% 1:11 & relation.nextyr == "SUBSCRIBER", "Tracked - Subscriber",
  market.nextyr %in% 1:11 & relation.nextyr != "SUBSCRIBER", "Tracked - Spouse/Dependent",
  default = "Untracked"), ]

# Define a tracked subscriber as a subscriber who is in some kind of insurance next year.
SubDat[, tracked := fcase(
  destination == "Untracked", 0, 
  default = 1), ]

# Define a closed-out subscriber as a subscriber whose leaves the small group market and their group disappears
SubDat[, con.exists.lead := ifelse(
  is.na(con.exists.lead), 
  0, 
  con.exists.lead), ]
SubDat[, closed_out := fcase(
  markettype == "SmallGroup" & con.exists.lead == 0, 1, 
  con.exists.lead == 1, 0), ]

# Define a forced_out subscriber as a closed-out switcher in a group where more than 
# "forced boundary"% of subscribers are either in the individual market, untracked, or go to spousal/dependent coverage.

SubDat[, num_forced := sum(in_sample.lead == 1 | tracked == 0 | destinationtype == "Tracked - Spouse/Dependent"), 
  by = c("year", "contractnum")]
# numforced is the number of subscribers who go to individual market (in-sample or out of sample), uninsurance, or spousal coverage
SubDat[, n_group := .N, by = c("year", "contractnum")]
SubDat[, p_forced := num_forced / n_group, ]
SubDat[, forced_out := as.numeric(p_forced > p_forced_boundary & closed_out == T), ]

# ~ # ~ # ~ # ~ # # ~ # ~ # ~ # # ~ # ~ #
# Cleaning Data Sample and Saving       #
# ~ # ~ # ~ # ~ # # ~ # ~ # ~ # # ~ # ~ #

# Subsetting to Only Small Group
SubDat <- SubDat[markettype == "SmallGroup"]

# Subsetting to the same type of subscriber that is used in the
# counterfactuals.  Further subsetting will occur before estimation. 
n_stage <- nrow(SubDat)
names(n_stage) <- "Full"

SubDat <-  SubDat[ (!is.na(age)) ]
n_stage <- c(n_stage, nrow(SubDat[forced_out == 1]))
names(n_stage)[which(names(n_stage) == "")] <- "Non-Missing Age"

SubDat <-  SubDat[ age >= 20 & age <= 65]
n_stage <- c(n_stage, nrow(SubDat[forced_out == 1]))
names(n_stage)[which(names(n_stage) == "")] <- "Age in 20-65 Range"

SubDat <- SubDat[ (!is.na(nspouse)) ]
n_stage <- c(n_stage, nrow(SubDat[forced_out == 1]))
names(n_stage)[which(names(n_stage) == "")] <- "Non-Missing nspouse"

SubDat <- SubDat[ (!is.na(ndeps)) ]
n_stage <- c(n_stage, nrow(SubDat[forced_out == 1]))
names(n_stage)[which(names(n_stage) == "")] <- "Non-Missing ndeps"

SubDat <- SubDat[ (!is.na(sum_concurrent_risk)) ]
n_stage <- c(n_stage, nrow(SubDat[forced_out == 1]))
names(n_stage)[which(names(n_stage) == "")] <- "Non-Missing ACG"

SubDat <-  SubDat[ (!is.na(best_guess_incomeoverFPL)) ]
n_stage <- c(n_stage, nrow(SubDat[forced_out == 1]))
names(n_stage)[which(names(n_stage) == "")] <- "Non-Missing Income"

print("Subsetting Table:")
print(n_stage)

# ~ # ~ # ~ # ~ # ~ # ~ # 
# Publication tables    #
# ~ # ~ # ~ # ~ # ~ # ~ #

# New switcher sample
SubDat[, col := ifelse(best_guess_metal %in% 2:5, "Non-Grandfathered", "Grandfathered"), ]
SubDat[, row := fcase(
  in_sample.lead == 1, "Individual Market", 
  tracked == 0, "Uninsured",
  market.nextyr %in% 6:9, "Large Group Market", 
  relation.nextyr == "SPOUSE", "Coverage Through Spouse", 
  age >=64 | market.nextyr %in% c(1:5, 10:11) & relation.nextyr != "SPOUSE", "Other",
  default = "Other"), ]

TransitionTable <- with(SubDat[forced_out == 1], 
                        table(row, col))
TransitionTable <- cbind(TransitionTable , rowSums(TransitionTable))
TransitionTable <- rbind(TransitionTable , colSums(TransitionTable))
colnames(TransitionTable)[ncol(TransitionTable)] <- "Total"
rownames(TransitionTable)[nrow(TransitionTable)] <- "Total"
TransitionTable <- TransitionTable[c(2, 1, 3, 4, 5, 6), ]

# Make table split up by Grandfathered/non-grandfathered
dir.create("../analysis/tablesandfigures/release/switcher_transitions")

capture.output(
MakeLatexTable(
  startcodelist = list(
    "\\begin{tabular}{@{\\extracolsep{5pt}}lccc}",
    " & & & \\",
    "\\hline",
    "\\hline",
    paste0(paste(c("Destination", colnames(TransitionTable)), collapse = "&"), "\\\\"),
    "\\cmidrule(ll){1-1} \\cmidrule(ll){2-2} \\cmidrule(ll){3-3} \\cmidrule(ll){4-4}"),
  endcodelist = list(    
    "\\hline",
    "\\hline",
    "\\end{tabular}"),
  data = TransitionTable),
  file = "../analysis/tablesandfigures/release/switcher_transitions/switcher_transitions.tex",
  append = F)

# Make table with only overall
capture.output(
MakeLatexTable(
  startcodelist = list(
    "\\begin{tabular}{@{\\extracolsep{5pt}}lc}",
    " & \\\\",
    "\\hline",
    "\\hline",
    paste0("& ", colnames(TransitionTable)[3], " \\\\"),
    "\\cmidrule(ll){1-1} \\cmidrule(ll){2-2}"),
  endcodelist = list(    
    "\\hline",
    "\\hline",
    "\\end{tabular}"),
  data = as.matrix(TransitionTable[, 3])),
  file = "../analysis/tablesandfigures/release/switcher_transitions/switcher_transitions_overall.tex",
  append = F)
  
# ~ # ~ # ~ # ~ # 
# Saving Data   #
# ~ # ~ # ~ # ~ #
fwrite(SubDat[forced_out == 1], file = "forced_switchers.csv")
print("Saving Complete")
